To do: check those branches whose distances < 0.1 but !=0 -- are their substations really that close?
Count Length(km)
1 0.112
This website https://www.powerwatch.org.uk/elf/substations.asp claims low power substations to be ~150-200 meters part in urban areas, so we can assume (for now) that lines shorter than ~100m (=0.1km) are internal substation lines while longer lines are transmission lines.
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib
import matplotlib.pyplot as plt
from bokeh.io import show,output_notebook,output_file
from bokeh.models import (
ColumnDataSource,
HoverTool,
LogColorMapper,
CustomJS,
Slider,
CheckboxGroup,
Select,
Div,
TapTool
)
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure
from bokeh.layouts import (widgetbox, row)
from bokeh.tile_providers import CARTODBPOSITRON,STAMEN_TONER_BACKGROUND,STAMEN_TONER
from bokeh.sampledata.us_counties import data as counties
from bokeh import events
from bokeh.colors.groups import yellow
import os
import sys
sys.path.append("..")
from westernintnet.westernintnet import win_data
output_notebook()
from pyproj import Proj, transform
def reproject_wgs_to_itm(x_lon_lat):
prj_wgs = Proj(init='epsg:4326')
prj_itm = Proj(init='EPSG:3857')
x, y = transform(prj_wgs, prj_itm, x_lon_lat[0], x_lon_lat[1])
r = [x,y]
return r
state_list = ['wa','or','ca','nv','az','ut','nm','co','wy','id','mt','tx']
#This just shades CA for reference;
counties = {
code: county for code, county in counties.items() if county["state"] == "ca"
}
#This ends up giving the whole world
#counties = {
# code: county for code, county in counties.items() for county["state"] in state_list
#}
# Convert lon/lat to x/y
county_xys = [reproject_wgs_to_itm([county["lons"],county["lats"]]) for county in counties.values()]
county_xs = [county_xys[i][0] for i,county in enumerate(counties.values())]
county_ys = [county_xys[i][1] for i,county in enumerate(counties.values())]
county_names = [county['name'] for county in counties.values()]
county_color = []
for c_name in county_names:
color = 'black'
county_color.append(color)
source = ColumnDataSource(data=dict(
x=county_xs,
y=county_ys,
name=county_names,
color=county_color,
))
r1 = win_data.branches[['from_lon','from_lat']].apply(reproject_wgs_to_itm,axis=1)
win_data.branches['from_x'] = r1.apply(lambda x:x[0])
win_data.branches['from_y'] = r1.apply(lambda x:x[1])
r2 = win_data.branches[['to_lon','to_lat']].apply(reproject_wgs_to_itm,axis=1)
win_data.branches['to_x'] = r2.apply(lambda x:x[0])
win_data.branches['to_y'] = r2.apply(lambda x:x[1])
win_data_base_branches = win_data.branches.copy()
def makeMap(congestion_df):
util_rate = congestion_df['hutil>=0p75']
win_data_branches = pd.concat([win_data_base_branches, congestion_df], axis=1)
#Replace this population-derived values from congestion_p-values notebook, or use the zsore from the notebook remove this
#mean = win_data_branches.loc[win_data_branches['Capacity']!=99999]['hutil>=0p75'].mean()
#sdev = win_data_branches.loc[win_data_branches['Capacity']!=99999]['hutil>=0p75'].std()
minima = util_rate.min()
maxima = util_rate.max()
norm = matplotlib.colors.Normalize(vmin=minima, vmax=maxima, clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.jet)
mapper.set_array([])
colors = mapper.to_rgba(util_rate.values)
# Pick colors and linewidths for 3 types of lines depending on zscore = (x-mu)/sigma
# gray, light red, dark red for increasing congestion
# Add purple for lines that get 100% congestion
colors = ['#AAB7BB','#FF5733','#C70039','#FF00FF']
# thin, thick, thick for increasing congestion
linewidths = [0.5,5.0,5.0,5.0]
# TO DO: Add shape for non-transmission lines
#replacing with pre-calculated z-score
zscore_t1 = 2.
zscore_t2 = 1.
win_data_branches.loc[(win_data_branches['zscore'] >= zscore_t1),'color'] = colors[2]
win_data_branches.loc[(win_data_branches['zscore'] < zscore_t1)
& (win_data_branches['zscore'] >= zscore_t2),'color'] = colors[1]
win_data_branches.loc[(win_data_branches['zscore'] < zscore_t2),'color'] = colors[0]
win_data_branches.loc[(win_data_branches['zscore'] >= zscore_t1),'linewidth'] = linewidths[2]
win_data_branches.loc[(win_data_branches['zscore'] < zscore_t1)
& (win_data_branches['zscore'] >= zscore_t2),'linewidth'] = linewidths[1]
win_data_branches.loc[(win_data_branches['zscore'] < zscore_t2),'linewidth'] = linewidths[0]
#lines with 100% congestion for at least 1 hour
win_data_branches.loc[(win_data_branches['zscore'] >= zscore_t1) & (win_data_branches['hutil1'] >= 1),'color'] = colors[3]
win_data_branches.loc[(win_data_branches['zscore'] >= zscore_t1) & (win_data_branches['hutil1'] >= 1),'linewidth'] = linewidths[3]
#set up figure
TOOLS = "pan,wheel_zoom,reset,hover,save"
p = figure(
title="WECC Energy Grid", tools=TOOLS,
x_axis_location=None, y_axis_location=None,
plot_width=800,plot_height=800)
p.add_tile(CARTODBPOSITRON)
p.patches('x', 'y', source=source, fill_color={'field':'color'}, fill_alpha=0.1, line_color="white", line_width=0.5)
p.multi_line(win_data_branches[['from_x','to_x']].values.tolist(),
win_data_branches[['from_y','to_y']].values.tolist(),
line_color=win_data_branches['color'],line_width=win_data_branches['linewidth'])
p.circle(win_data_branches[['from_x']].values.tolist(),
win_data_branches[['from_y']].values.tolist(),
size = 20,
color="#1C9099",
#color=win_data_branches['color'],
#line_width=win_data_branches['linewidth'],
line_width=2
)
#show(p)
return p
datadir = os.path.join('..','data/')
scenarios = ['western_scenario_Update01','california2020Test01','california2030Test01',
'california2020_fixCalCong','california2030_fixCalCong','california2020_westTarget','california2030_westTarget']
labels = ['2016','With CA 2020 goals','With CA 2030 goals',
'With CA 2020 goals and doubled transmission capacity on congested lines',
'With CA 2030 goals and doubled transmission capacity on congested lines',
'WECC 2020 with same targets as CA 2020',
'WECC 2030 with same targets as CA 2030'
]
congestion_map = {}
congestion_dfs = {}
for s in scenarios:
congestion_source = datadir + 'congestion_' + s +'.csv'
congestion_dfs[s] = pd.read_csv(congestion_source)
congestion_map[s] = makeMap(congestion_dfs[s])
#show(congestion_map)
for s in zip(scenarios,labels):
print(s[1])
show(congestion_map[s[0]])
Some summary statistics
substation_size = 0.1 #extent of substation; lines shorter than this are considered 'non-transmission' lines
for s in scenarios:
print(s)
congestion_dfs[s]['isTransmissionLine'] = np.where(congestion_dfs[s]['dist'] > substation_size, 1, 0)
debug=0
if debug ==1 :
for s in scenarios:
print(s)
print(congestion_dfs[s][['Capacity','hutil>=0p75','dist','zscore','pvalue','isTransmissionLine']].groupby('isTransmissionLine').mean())
print()
if debug == 1:
allLines = {}
nonZeroDistLines = {}
zeroDistLines = {}
for s in scenarios:
cond75 = congestion_dfs[s]['zscore']>=1
condCap = congestion_dfs[s]['Capacity'] < 99999
condLine = congestion_dfs[s]['isTransmissionLine'] == 1
condNotLine = congestion_dfs[s]['isTransmissionLine'] == 0
#totallines[s] = len(congestion_dfs[s])
#n_congested[s] = len(congestion_dfs[s].loc[condition75])
c1 = condCap & cond75 & condLine
c2 = condCap & cond75 & condNotLine
nonZeroDistLines[s] = congestion_dfs[s].loc[c1]
zeroDistLines[s] = congestion_dfs[s].loc[c2]
print(s)
print('isTransmission')
print(nonZeroDistLines[s][['Capacity','dist','zscore']].describe())
print('isNotTransmission')
print(zeroDistLines[s][['Capacity','dist','zscore']].describe())
print()
col_names = ['Total_hours_100',
'Total_hours_90',
'Total_hours_75',
'N_congested_all',
'N_congested_dist_nonzero',
'N_congested_dist_zero',
'mean_dist_all',
'mean_dist_nonzero',
'mean_dist_zero',
'mean_capacity_all',
'mean_capacity_nonzero',
'mean_capacity_zero',
'mean_util_75_all',
'mean_util_75_nonzero',
'mean_util_75_zero',
'mean_util_90_all',
'mean_util_90_nonzero',
'mean_util_90_zero'
]
nice_colnames = ['Total hours of 100% congestion',
'Total hours of >=90% congestion',
'Total hours of >=75% congestion',
'Count of all congested lines',
'Count of congested transmission lines',
'Count of congested non-transmission lines',
'Mean length of all congested lines',
'Mean length of congested transmission lines',
'Mean length of congested non-transmission lines',
'Mean capacity (MVA) of all congested lines',
'Mean capacity (MVA) of congested transmission lines',
'Mean capacity (MVA) of congested non-transmission lines',
'Mean number of hours at 75% congestion, all congested lines',
'Mean number of hours at 75% congestion, congested transmission lines',
'Mean number of hours at 75% congestion, congested non-transmission lines',
'Mean number of hours at 90% congestion, all congested lines',
'Mean number of hours at 90% congestion, congested transmission lines',
'Mean number of hours at 90% congestion, congested non-transmission lines'
]
Make stats table
allLines = {}
nonZeroDistLines = {}
zeroDistLines = {}
stats = pd.DataFrame(columns = col_names, index = scenarios)
for s in scenarios:
cond75 = congestion_dfs[s]['zscore']>=1
condCap = congestion_dfs[s]['Capacity'] < 99999
condLine = congestion_dfs[s]['isTransmissionLine'] == 1
condNotLine = congestion_dfs[s]['isTransmissionLine'] == 0
c1 = condCap & cond75 & condLine
c2 = condCap & cond75 & condNotLine
allLines[s] = congestion_dfs[s].loc[condCap & cond75]
nonZeroDistLines[s] = congestion_dfs[s].loc[c1]
zeroDistLines[s] = congestion_dfs[s].loc[c2]
stats.loc[s]['Total_hours_100'] = congestion_dfs[s]['hutil1'].sum()
stats.loc[s]['Total_hours_90'] = congestion_dfs[s]['hutil>=0p9'].sum()
stats.loc[s]['Total_hours_75'] = congestion_dfs[s]['hutil>=0p75'].sum()
stats.loc[s]['N_congested_all'] = len(allLines[s])
stats.loc[s]['N_congested_dist_nonzero'] = len(nonZeroDistLines[s])
stats.loc[s]['N_congested_dist_zero'] = len(zeroDistLines[s])
stats.loc[s]['mean_dist_all'] = allLines[s]['dist'].mean()
stats.loc[s]['mean_dist_nonzero'] = nonZeroDistLines[s]['dist'].mean()
stats.loc[s]['mean_dist_zero'] = zeroDistLines[s]['dist'].mean()
stats.loc[s]['mean_capacity_all'] = allLines[s]['Capacity'].mean()
stats.loc[s]['mean_capacity_nonzero'] = nonZeroDistLines[s]['Capacity'].mean()
stats.loc[s]['mean_capacity_zero'] = zeroDistLines[s]['Capacity'].mean()
stats.loc[s]['mean_util_75_all'] = allLines[s]['hutil>=0p75'].mean()
stats.loc[s]['mean_util_75_nonzero'] = nonZeroDistLines[s]['hutil>=0p75'].mean()
stats.loc[s]['mean_util_75_zero'] = zeroDistLines[s]['hutil>=0p75'].mean()
stats.loc[s]['mean_util_90_all'] = allLines[s]['hutil>=0p9'].mean()
stats.loc[s]['mean_util_90_nonzero'] = nonZeroDistLines[s]['hutil>=0p9'].mean()
stats.loc[s]['mean_util_90_zero'] = zeroDistLines[s]['hutil>=0p9'].mean()
outfile = pd.concat([stats.T, pd.Series(nice_colnames, index = col_names)], join='inner', axis=1)
outfile.rename(columns={0:'nice_labels'}, inplace = True)
new_col_names = ['nice_labels'] + scenarios
outfile = outfile[new_col_names]
if not os.path.exists(datadir + '/' + 'CA2045_scenarios_summary_stats.csv'):
outfile.to_csv(datadir + '/' + 'CA2045_scenarios_summary_stats.csv')
stats.T.head(30)
Set up groups for plotting
sc_set1 = ['western_scenario_Update01','california2020Test01','california2020_fixCalCong','california2020_westTarget']
sc_set2 = ['western_scenario_Update01','california2030Test01','california2030_fixCalCong','california2030_westTarget']
count_grp = ['N_congested_all', 'N_congested_dist_nonzero', 'N_congested_dist_zero']
capacity_grp = ['mean_capacity_all', 'mean_capacity_nonzero', 'mean_capacity_zero']
util_grp = ['mean_util_75_all', 'mean_util_75_nonzero', 'mean_util_75_zero',
'mean_util_90_all', 'mean_util_90_nonzero', 'mean_util_90_zero']
grp_set = [count_grp, capacity_grp, util_grp]
tgrp_set = [sc_set1, sc_set2]
stats.T.plot(kind='bar', figsize=[20,8])
stats[util_grp].T.plot(kind='bar', figsize=[20,8], title ='Mean Congested Hours, 75% and 90% level')
stats[count_grp].T.plot(kind='bar', figsize=[10,8], title ='Number of Congested Lines, 75% level')
stats[capacity_grp].T.plot(kind='bar', figsize=[10,8], title ='Mean Capacity (MVA) of Congested Lines, 75% level')
for g in tgrp_set:
stats.T[g].plot(kind='bar', figsize=[16,6])
stats.T.columns
for g in tgrp_set:
stats[util_grp].T[g].plot(kind='bar', figsize=[20,8], title = 'Mean Utility (Hours), 75% and 90%, for Congested Lines')
for g in grp_set:
stats[g].plot(kind='bar')
#Make this more useful
stats.plot(kind='bar', subplots=True, figsize = [10,40])
Overall branch characteristics stats
# overall stats
s1 = 'western_scenario_Update01'
df1 = congestion_dfs[s1]
cc1 = congestion_dfs[s1]['Capacity'] == 99999
cc2 = congestion_dfs[s1]['Capacity'] < 99999
cc3 = congestion_dfs[s1]['dist'] == 0.0
cc4 = congestion_dfs[s1]['dist'] != 0.0
df2 = congestion_dfs[s1].loc[cc1]
df3 = congestion_dfs[s1].loc[cc1 & cc3]
df4 = congestion_dfs[s1].loc[cc4 & cc2]
df5 = congestion_dfs[s1].loc[cc3 & cc2]
print('Total number of WECC branches = ', len(df1))
print('Total number of rateA == 0 branches = ', len(df2 ))
print('Number of input branches into congestion analysis = ', len(df1) - len(df2))
print('Total number of dist == 0 and rateA == 0 branches = ', len(df3))
print('Total number of dist != 0 and rateA != 0 branches = ', len(df4))
print('Total number of dist == 0 and rateA != 0 branches = ', len(df5))
grp_set
['mean_util_75_all', 'mean_util_75_nonzero', 'mean_util_75_zero']+['mean_util_90_all', 'mean_util_90_nonzero', 'mean_util_90_zero']
stats.head()
stats.T.head()
stats.T.index
for x in zip(col_names, nice_colnames):
stats.T.loc[x[0],'nice_label'] = x[1]
stats.T.loc['N_congested_all']
stats.T
scenarios